Linear Regression

Imports


In [75]:
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
plt.rcParams['figure.figsize'] = (20.0, 10.0)

Data


In [53]:
# Load the boston dataset.
boston = load_boston()
X, y = boston['data'], boston['target']

In [54]:
X.shape


Out[54]:
(506, 13)

In [55]:
X = X[:30,1]

In [62]:
y = y[:30]

In [56]:
X.shape


Out[56]:
(30,)

In [57]:
X


Out[57]:
array([ 18. ,   0. ,   0. ,   0. ,   0. ,   0. ,  12.5,  12.5,  12.5,
        12.5,  12.5,  12.5,  12.5,   0. ,   0. ,   0. ,   0. ,   0. ,
         0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,
         0. ,   0. ,   0. ])

In [59]:
X = np.array([np.ones(X.shape[0]),X])

In [67]:
X = X.T

In [70]:
X.shape


Out[70]:
(30, 2)

Functions


In [96]:
def cost_function(X, Y, B='na'):
    if B == 'na':
        B = np.zeros(X.shape[1])

    m = len(Y)
    J = np.sum((X.dot(B) - Y) ** 2)/(2 * m)
    return J

In [131]:
def gradient_descent(X, Y, theta='na', alpha=0.0001, iterations=1000):
    #X = np.concatenate((np.ones(X.shape[0])[:, np.newaxis], X), axis=1)
    #X = X.T
    
    if theta == 'na':
        theta = np.zeros(X.shape[1])
    cost_history = [0] * iterations
    
    for iteration in range(iterations):
        hypothesis = X.dot(theta)
        loss = hypothesis-y
        gradient = X.T.dot(loss)/m
        theta = theta - alpha*gradient
        cost = cost_function(X, y, theta)
        cost_history[iteration] = cost

    return theta, cost_history
    return B, cost_history

In [132]:
X = boston.data
y = boston.target
X = pd.DataFrame(X)
m = X.shape[0]
x0 = np.ones(m)
aa = [x0]
for i in range(X.shape[1]):
    aa.append(X[i])
X = np.array(aa).T
#B = np.array([0, 0, 0])

In [133]:
cost_function(X, y)


Out[133]:
296.07345849802368

In [136]:
newB, cost_history = gradient_descent(X, y, 1000)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-136-5bdd526923ba> in <module>()
----> 1 newB, cost_history = gradient_descent(X, y, 1000)

<ipython-input-131-a637f134f196> in gradient_descent(X, Y, theta, alpha, iterations)
      9     for iteration in range(iterations):
     10         hypothesis = X.dot(theta)
---> 11         loss = hypothesis-y
     12         gradient = X.T.dot(loss)/m
     13         theta = theta - alpha*gradient

ValueError: operands could not be broadcast together with shapes (506,14) (506,) 

In [ ]: